/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.plan;
import java.util.ArrayList;
import org.apache.hadoop.hive.ql.udf.UDFType;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
/**
* GroupByDesc.
*
*/
@Explain(displayName = "Group By Operator")
public class GroupByDesc implements java.io.Serializable {
/**
* Group-by Mode: COMPLETE: complete 1-phase aggregation: iterate, terminate
* PARTIAL1: partial aggregation - first phase: iterate, terminatePartial
* PARTIAL2: partial aggregation - second phase: merge, terminatePartial
* PARTIALS: For non-distinct the same as PARTIAL2, for distinct the same as
* PARTIAL1
* FINAL: partial aggregation - final phase: merge, terminate
* HASH: For non-distinct the same as PARTIAL1 but use hash-table-based aggregation
* MERGEPARTIAL: FINAL for non-distinct aggregations, COMPLETE for distinct
* aggregations.
*/
private static final long serialVersionUID = 1L;
/**
* Mode.
*
*/
public static enum Mode {
COMPLETE, PARTIAL1, PARTIAL2, PARTIALS, FINAL, HASH, MERGEPARTIAL
};
private Mode mode;
private boolean groupKeyNotReductionKey;
private boolean bucketGroup;
private java.util.ArrayList<ExprNodeDesc> keys;
private java.util.ArrayList<org.apache.hadoop.hive.ql.plan.AggregationDesc> aggregators;
private java.util.ArrayList<java.lang.String> outputColumnNames;
private float groupByMemoryUsage;
private float memoryThreshold;
public GroupByDesc() {
}
public GroupByDesc(
final Mode mode,
final java.util.ArrayList<java.lang.String> outputColumnNames,
final java.util.ArrayList<ExprNodeDesc> keys,
final java.util.ArrayList<org.apache.hadoop.hive.ql.plan.AggregationDesc> aggregators,
final boolean groupKeyNotReductionKey,float groupByMemoryUsage, float memoryThreshold) {
this(mode, outputColumnNames, keys, aggregators, groupKeyNotReductionKey,
false, groupByMemoryUsage, memoryThreshold);
}
public GroupByDesc(
final Mode mode,
final java.util.ArrayList<java.lang.String> outputColumnNames,
final java.util.ArrayList<ExprNodeDesc> keys,
final java.util.ArrayList<org.apache.hadoop.hive.ql.plan.AggregationDesc> aggregators,
final boolean groupKeyNotReductionKey, final boolean bucketGroup,float groupByMemoryUsage, float memoryThreshold) {
this.mode = mode;
this.outputColumnNames = outputColumnNames;
this.keys = keys;
this.aggregators = aggregators;
this.groupKeyNotReductionKey = groupKeyNotReductionKey;
this.bucketGroup = bucketGroup;
this.groupByMemoryUsage = groupByMemoryUsage;
this.memoryThreshold = memoryThreshold;
}
public Mode getMode() {
return mode;
}
@Explain(displayName = "mode")
public String getModeString() {
switch (mode) {
case COMPLETE:
return "complete";
case PARTIAL1:
return "partial1";
case PARTIAL2:
return "partial2";
case PARTIALS:
return "partials";
case HASH:
return "hash";
case FINAL:
return "final";
case MERGEPARTIAL:
return "mergepartial";
}
return "unknown";
}
public void setMode(final Mode mode) {
this.mode = mode;
}
@Explain(displayName = "keys")
public java.util.ArrayList<ExprNodeDesc> getKeys() {
return keys;
}
public void setKeys(final java.util.ArrayList<ExprNodeDesc> keys) {
this.keys = keys;
}
@Explain(displayName = "outputColumnNames")
public java.util.ArrayList<java.lang.String> getOutputColumnNames() {
return outputColumnNames;
}
public void setOutputColumnNames(
java.util.ArrayList<java.lang.String> outputColumnNames) {
this.outputColumnNames = outputColumnNames;
}
public float getGroupByMemoryUsage() {
return groupByMemoryUsage;
}
public void setGroupByMemoryUsage(float groupByMemoryUsage) {
this.groupByMemoryUsage = groupByMemoryUsage;
}
public float getMemoryThreshold() {
return memoryThreshold;
}
public void setMemoryThreshold(float memoryThreshold) {
this.memoryThreshold = memoryThreshold;
}
@Explain(displayName = "aggregations")
public java.util.ArrayList<org.apache.hadoop.hive.ql.plan.AggregationDesc> getAggregators() {
return aggregators;
}
public void setAggregators(
final java.util.ArrayList<org.apache.hadoop.hive.ql.plan.AggregationDesc> aggregators) {
this.aggregators = aggregators;
}
public boolean getGroupKeyNotReductionKey() {
return groupKeyNotReductionKey;
}
public void setGroupKeyNotReductionKey(final boolean groupKeyNotReductionKey) {
this.groupKeyNotReductionKey = groupKeyNotReductionKey;
}
@Explain(displayName = "bucketGroup")
public boolean getBucketGroup() {
return bucketGroup;
}
public void setBucketGroup(boolean dataSorted) {
bucketGroup = dataSorted;
}
/**
* Checks if this grouping is like distinct, which means that all non-distinct grouping
* columns behave like they were distinct - for example min and max operators.
*/
public boolean isDistinctLike() {
ArrayList<AggregationDesc> aggregators = getAggregators();
for(AggregationDesc ad: aggregators){
if(!ad.getDistinct()) {
GenericUDAFEvaluator udafEval = ad.getGenericUDAFEvaluator();
UDFType annot = udafEval.getClass().getAnnotation(UDFType.class);
if(annot == null || !annot.distinctLike()) {
return false;
}
}
}
return true;
}
}